{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "QrX1A360Dt7e"
      },
      "source": [
        "Using the [HuggingChat API](https://github.com/Soulter/hugging-chat-api).\n",
        "The chat() function receives these parameters:\n",
        "\n",
        "* text: Required[str].\n",
        "* temperature: Optional[float]. Default is 0.9\n",
        "* top_p: Optional[float]. Default is 0.95\n",
        "* repetition_penalty: Optional[float]. Default is 1.2\n",
        "* top_k: Optional[int]. Default is 50\n",
        "* truncate: Optional[int]. Default is 1024\n",
        "* watermark: Optional[bool]. Default is False\n",
        "* max_new_tokens: Optional[int]. Default is 1024\n",
        "* stop: Optional[list]. Default is [\"\"]\n",
        "* return_full_text: Optional[bool]. Default is False\n",
        "* stream: Optional[bool]. Default is True\n",
        "* use_cache: Optional[bool]. Default is False\n",
        "* is_retry: Optional[bool]. Default is False\n",
        "* retry_count: Optional[int]. Number of retries for requesting huggingchat Default is 5"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "QPZn3YULrezS"
      },
      "source": [
        "# Imports"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#!pip install hugchat\n",
        "import pandas as pd\n",
        "from hugchat import hugchat"
      ],
      "metadata": {
        "id": "smergE6govV8"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Set up drive in folder with corpus table"
      ],
      "metadata": {
        "id": "CMu6flmVl8hq"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "q-i5-GZcr3pS",
        "outputId": "9ffd668d-1eb5-4c75-feda-45c1582a78a5"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ],
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive', force_remount=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "G2jY0-z6uhPo",
        "outputId": "8185079c-ab4f-4975-9365-af4de871c0ff"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/drive/MyDrive/weimar_films_datasets\n"
          ]
        }
      ],
      "source": [
        "%cd /content/drive/MyDrive/weimar_films_datasets/"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Set up hugchat"
      ],
      "metadata": {
        "id": "vAatvJulmCjS"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "cookies = [{\n",
        "        \"domain\": \".huggingface.co\",\n",
        "        \"expirationDate\": 1715771235,\n",
        "        \"hostOnly\": False,\n",
        "        \"httpOnly\": False,\n",
        "        \"name\": \"__stripe_mid\",\n",
        "        \"path\": \"/\",\n",
        "        \"sameSite\": \"strict\",\n",
        "        \"secure\": True,\n",
        "        \"session\": False,\n",
        "        \"storeId\": 0,\n",
        "        \"value\": \"c7c7954d-06a7-4842-919f-d4c82d4b21f1a840e7\"\n",
        "    },\n",
        "    {\n",
        "        \"domain\": \".huggingface.co\",\n",
        "        \"expirationDate\": 1718795240.345877,\n",
        "        \"hostOnly\": False,\n",
        "        \"httpOnly\": False,\n",
        "        \"name\": \"_ga\",\n",
        "        \"path\": \"/\",\n",
        "        \"sameSite\": 0,\n",
        "        \"secure\": False,\n",
        "        \"session\": False,\n",
        "        \"storeId\": 0,\n",
        "        \"value\": \"GA1.1.1147015594.1684235039\"\n",
        "    },\n",
        "    {\n",
        "        \"domain\": \".huggingface.co\",\n",
        "        \"expirationDate\": 1715771227.639737,\n",
        "        \"hostOnly\": False,\n",
        "        \"httpOnly\": True,\n",
        "        \"name\": \"token\",\n",
        "        \"path\": \"/\",\n",
        "        \"sameSite\": \"lax\",\n",
        "        \"secure\": True,\n",
        "        \"session\": False,\n",
        "        \"storeId\": 0,\n",
        "        \"value\": \"DGrwqrJRMNXIcuKSTjUZiGaCryRPmhYuhiMVeYFJSisHasryTvCzRwUCaRXUABRSoZSEQPHRcBEPDcfLIDgarsksdjWVuZrSHDUtaXApqhiAWXWqCBIXqiupRPfzkYlQ\"\n",
        "    },\n",
        "    {\n",
        "        \"domain\": \".huggingface.co\",\n",
        "        \"expirationDate\": 1684237035,\n",
        "        \"hostOnly\": False,\n",
        "        \"httpOnly\": False,\n",
        "        \"name\": \"__stripe_sid\",\n",
        "        \"path\": \"/\",\n",
        "        \"sameSite\": \"strict\",\n",
        "        \"secure\": True,\n",
        "        \"session\": False,\n",
        "        \"storeId\": 0,\n",
        "        \"value\": \"e6a4201d-39f3-4ea6-a7fe-cdfe962cb9e308391d\"\n",
        "    },\n",
        "    {\n",
        "        \"domain\": \".huggingface.co\",\n",
        "        \"expirationDate\": 1718795240.345495,\n",
        "        \"hostOnly\": False,\n",
        "        \"httpOnly\": False,\n",
        "        \"name\": \"_ga_8Q63TH4CSL\",\n",
        "        \"path\": \"/\",\n",
        "        \"sameSite\": 0,\n",
        "        \"secure\": False,\n",
        "        \"session\": False,\n",
        "        \"storeId\": 0,\n",
        "        \"value\": \"GS1.1.1684235038.1.1.1684235240.0.0.0\"\n",
        "    },\n",
        "    {\n",
        "        \"domain\": \".huggingface.co\",\n",
        "        \"expirationDate\": 1684321640,\n",
        "        \"hostOnly\": False,\n",
        "        \"httpOnly\": False,\n",
        "        \"name\": \"_gid\",\n",
        "        \"path\": \"/\",\n",
        "        \"sameSite\": 0,\n",
        "        \"secure\": False,\n",
        "        \"session\": False,\n",
        "        \"storeId\": 0,\n",
        "        \"value\": \"GA1.2.382455122.1684235039\"\n",
        "    },\n",
        "    {\n",
        "        \"domain\": \"huggingface.co\",\n",
        "        \"expirationDate\": 1715857651.611839,\n",
        "        \"hostOnly\": True,\n",
        "        \"httpOnly\": True,\n",
        "        \"name\": \"hf-chat\",\n",
        "        \"path\": \"/\",\n",
        "        \"sameSite\": \"no_restriction\",\n",
        "        \"secure\": True,\n",
        "        \"session\": False,\n",
        "        \"storeId\": 0,\n",
        "        \"value\": \"7370bdb8-914b-4012-875e-9dea036d22bc\"\n",
        "    }\n",
        "]\n",
        "\n",
        "chatbot = hugchat.ChatBot(cookies=cookies)"
      ],
      "metadata": {
        "id": "jsV9KAROVGi5"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "To try and make this as reproducible as possible, I tried to put the \"temperature\" setting at 0. That didn't work, so I tinkered until I found the lowest setting that would still consistently run (0.001)."
      ],
      "metadata": {
        "id": "mgCXbwSoJ03I"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df = pd.read_excel('Reviews Corpus.xlsx', index_col=[0])\n",
        "\n",
        "temperature = 0.001\n",
        "count = 0\n",
        "answer_dict = {}\n",
        "\n",
        "for index, row in df.iterrows():\n",
        "    review_text = row[\"Review_Eng\"]\n",
        "\n",
        "    try:\n",
        "        chat_answer = chatbot.chat(f\"The following is a film review. Would you characterize it as negative or positive? {review_text}\", temperature)\n",
        "        answer_dict[index] = chat_answer\n",
        "    except:\n",
        "        answer_dict[index] = \"ERROR: check file\"\n",
        "\n",
        "    count += 1\n",
        "    print(f\"Number of texts processed: {count}\")\n",
        "\n",
        "# Create a new DataFrame containing the results and merge it with the original DataFrame\n",
        "df_answers = pd.DataFrame.from_dict(answer_dict, orient=\"index\", columns=[\"HuggingChat API Answers\"])\n",
        "df_merged = pd.concat([df, df_answers], axis=1)\n",
        "\n",
        "# Save the results to the file\n",
        "df_merged.to_excel(\"Reviews Corpus.xlsx\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "n-BK9HjvbKao",
        "outputId": "6e1b5def-572a-4e7d-c0fb-ebf3073c7352"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Number of texts processed: 1\n",
            "Number of texts processed: 2\n",
            "Number of texts processed: 3\n",
            "Number of texts processed: 4\n",
            "Number of texts processed: 5\n",
            "Number of texts processed: 6\n",
            "Number of texts processed: 7\n",
            "Number of texts processed: 8\n",
            "Number of texts processed: 9\n",
            "Number of texts processed: 10\n",
            "Number of texts processed: 11\n",
            "Number of texts processed: 12\n",
            "Number of texts processed: 13\n",
            "Number of texts processed: 14\n",
            "Number of texts processed: 15\n",
            "Number of texts processed: 16\n",
            "Number of texts processed: 17\n",
            "Number of texts processed: 18\n",
            "Number of texts processed: 19\n",
            "Number of texts processed: 20\n",
            "Number of texts processed: 21\n",
            "Number of texts processed: 22\n",
            "Number of texts processed: 23\n",
            "Number of texts processed: 24\n",
            "Number of texts processed: 25\n",
            "Number of texts processed: 26\n",
            "Number of texts processed: 27\n",
            "Number of texts processed: 28\n",
            "Number of texts processed: 29\n",
            "Number of texts processed: 30\n",
            "Number of texts processed: 31\n",
            "Number of texts processed: 32\n",
            "Number of texts processed: 33\n",
            "Number of texts processed: 34\n",
            "Number of texts processed: 35\n",
            "Number of texts processed: 36\n",
            "Number of texts processed: 37\n",
            "Number of texts processed: 38\n",
            "Number of texts processed: 39\n",
            "Number of texts processed: 40\n",
            "Number of texts processed: 41\n",
            "Number of texts processed: 42\n",
            "Number of texts processed: 43\n",
            "Number of texts processed: 44\n",
            "Number of texts processed: 45\n",
            "Number of texts processed: 46\n",
            "Number of texts processed: 47\n",
            "Number of texts processed: 48\n",
            "Number of texts processed: 49\n",
            "Number of texts processed: 50\n",
            "Number of texts processed: 51\n",
            "Number of texts processed: 52\n",
            "Number of texts processed: 53\n",
            "Number of texts processed: 54\n",
            "Number of texts processed: 55\n",
            "Number of texts processed: 56\n",
            "Number of texts processed: 57\n",
            "Number of texts processed: 58\n",
            "Number of texts processed: 59\n",
            "Number of texts processed: 60\n",
            "Number of texts processed: 61\n",
            "Number of texts processed: 62\n",
            "Number of texts processed: 63\n",
            "Number of texts processed: 64\n",
            "Number of texts processed: 65\n",
            "Number of texts processed: 66\n",
            "Number of texts processed: 67\n",
            "Number of texts processed: 68\n",
            "Number of texts processed: 69\n",
            "Number of texts processed: 70\n",
            "Number of texts processed: 71\n",
            "Number of texts processed: 72\n",
            "Number of texts processed: 73\n",
            "Number of texts processed: 74\n",
            "Number of texts processed: 75\n",
            "Number of texts processed: 76\n",
            "Number of texts processed: 77\n",
            "Number of texts processed: 78\n",
            "Number of texts processed: 79\n",
            "Number of texts processed: 80\n"
          ]
        }
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}